Stratification
Specifically look at IL-2 patients
Start by splitting on survival:
Start with patients who respond to treatment:
Compare clear cell with papillary cancer
cd ../src
import os as os
import pickle as pickle
import subprocess
import pandas as pd
from Processing.Tests import run_feature_matrix, SurvivalTest
pd.set_option('precision',3)
drugs = {'TKI': ['perifosine', 'sunitinib', 'sorafenib', 'pazopanib', 'sutent', 'tarceva','nexavaar',
'sutent (sunitinib)', 'gefitinib', 'nexavar', 'bay-439006',
'azd', 'iressa', 'sorafenib - nexavar', 'axitinib', 'sunitinib (sutent)', 'tipifarnib',
'tyrosine kinase inhibitor', 'votrient', 'zd6474'],
'VEGF Ab': ['bevacizumab', 'avastin'],
'mTORi': ['temsirolimus','everolimus','rad001','torisel','afinitor'],
'IL2/IF': ['interferon', 'il-2','il-2 (high dose)','proleukin (il-2)',
'interleukin-2','interferon-alpha','interferon alpha',
'intron a', 'alpha interferon', 'proleukin'
'roferon-a','il-2 thearpy (interleukin)','high dose interleukin-2',
'ifn-alpha (intron)', 'interleukin 2-high dose', 'inf'],
'Chemo': ['bortezomib', 'gemictiabine', '5-fluorouracil','capecitabine','gemzar','thalidomide','nab-rapamycin',
'capecitabin', 'gemcitabine','xeloda'],
'Vaccine': ['oncophage', 'oncophage vaccine']}
drug_map = {drug:family for family, drugs in drugs.items() for drug in drugs}
result_path = '/scratch/TCGA/Firehose__2012_01_16/ucsd_analyses'
run = sorted(os.listdir(result_path))[1]
run = pickle.load(open('/'.join([result_path, run, 'RunObject.p']), 'rb'))
cancer = run.load_cancer('KIRC')
clinical = cancer.load_clinical()
global_vars = cancer.load_global_vars()
clinical.clinical.tumor_stage.value_counts()
mut = cancer.load_data('MAF')
mut.uncompress()
meth = cancer.load_data('Methylation')
cn = cancer.load_data('CN_broad')
cn.uncompress()
rna = cancer.load_data('mRNASeq')
rppa = cancer.load_data('RPPA')
stage = clinical.clinical.tumor_stage
stage = stage.map(lambda s: s.replace('stge', 'stage'))
stage.value_counts().sort_index().plot(kind='bar')
fig, axs = subplots(1,2, figsize=(12,4))
v = clinical.clinical.tumor_grade
v.value_counts().sort_index().plot(kind='bar', title=v.name, ax=axs[0]);
v = clinical.clinical.tumor_grade.dropna().map(lambda s: s[:2])
v.value_counts().sort_index().plot(kind='bar', title=v.name, ax=axs[1]);
axs[0].set_ylabel('# of Patients')
age = clinical.clinical.age
by_stage = pd.DataFrame({s: age[stage[stage==s].index].describe() for s in stage.unique()})
all_stage = pd.Series(clinical.clinical.age.describe(), name='All')
by_stage.join(all_stage).astype(object)
age.hist()
ylabel('# of Patients')
xlabel('Age')
clinical.clinical.lymphnode_stage.value_counts().plot(kind='bar')
ylabel('# of Patients')
xlabel('Status')
clinical.clinical.gender.value_counts().plot(kind='bar')
ylabel('# of Patients')
xlabel('Gender')
pd.crosstab(stage, clinical.clinical.calcium_level)
s = pd.crosstab(stage, clinical.clinical.calcium_level).ix['stage iv']
s[['low','normal','elevated']].plot(kind='bar')
ylabel('# of Patients')
xlabel('Status')
pd.crosstab(stage, clinical.clinical.eastern_cancer_oncology_group.dropna())
clinical.clinical.hemoglobin.value_counts().plot(kind='bar')
ylabel('# of Patients')
xlabel('Hemoglobin Level')
import Reports.Figures as F
reload(F)
from Reports.Figures import *
from Processing.Tests import *
from Reports.NotebookTools import *
def draw_survival_curves(feature, surv, assignment=None, filename='tmp.png', show=False,
title=True, labels=['No Mutation', 'Mutation'],
colors=['blue','red'], ann=None, show_legend=True, q=.25):
if assignment is None:
num_panels = 1
else:
num_panels = len(assignment.unique())
r.png(filename=filename, width=200*(num_panels+1), height=300, res=75)
fmla = robjects.Formula('Surv(days, event) ~ feature')
r.par(mfrow=r.c(1, num_panels))
r.par(mar=r.c(4,5,4,1))
r.par(xpd=True)
if (get_vec_type(feature) == 'real') and (len(feature.unique()) > 5):
colors=['blue','orange','red']
if q == .5:
labels=['Bottom 50%', 'Top 50%']
else:
labels=['Bottom {}%'.format(int(q*100)), 'Normal', 'Top {}%'.format(int(q*100))]
ls = r.c(*colors)
def plot_me(sub_f, label):
if (get_vec_type(sub_f) == 'real') and (len(sub_f.unique()) > 5):
sub_f = to_quants(sub_f, q=q)
m = get_cox_ph_ms(surv, sub_f, return_val='model', formula=fmla)
r_data = m.rx2('call')[2]
s = survival.survdiff(fmla, r_data)
p = str(s).split('\n\n')[-1].strip().split(', ')[-1]
ls = r.c(*colors)
r.plot(survival.survfit(fmla, r_data), lty=1, col=ls, lwd=4, cex=1.25,
xlab='Years to Event', ylab='Survival');
r.title(label, cex=3.)
if ann=='p':
r.text(0, labels='logrank ' + p, pos=4)
elif ann != None:
r.text(0, labels=ann, pos=4)
if assignment is None:
assignment = np.ones_like(feature)
name = lambda v: feature.name
else:
name = lambda v: str(assignment.name) + ' = ' + str(v)
if show_legend == 'out':
r.par(xpd=True, mar=r.c(4,5,5,8))
for value in sorted(assignment.ix[feature.index].dropna().unique()):
plot_me(feature.ix[assignment[assignment==value].index], name(value))
if show_legend == True:
mean_s = surv.ix[:,'event'].ix[assignment[assignment==value].index].mean()
if mean_s < .4:
r.legend(surv.ix[:,'days'].max() * .05 / 365., .45, labels,
lty=1, col=ls, lwd=3, bty='o')
else:
r.legend(surv.ix[:,'days'].max() * .4 / 365, .9, labels,
lty=1, col=ls, lwd=3, bty='o')
elif show_legend == 'out':
r.legend(surv.ix[:,'days'].max() * 1.1 / 365, .9, labels,
lty=1, col=ls, lwd=3, bty='o')
r('dev.off()')
if show:
return Show(filename)
stage = clinical.clinical.tumor_stage.map({'stge i': 'Stage I', 'stge ii': 'Stage II', 'stge iii': 'Stage III', 'stge iv': 'Stage IV'})
surv = clinical.survival.survival_5y
f = stage
f.name = 'Overall Survival'
t = get_surv_fit(surv, f)
t.columns = pd.MultiIndex.from_tuples([('','# Patients'), ('','# Deaths'),
('', 'Median OS'), ('95% Confidence Int.', 'Lower'),
('95% Confidence Int.', 'Upper')])
f = draw_survival_curves(f, surv, colors=['green','blue','orange','red'], labels=list(f.unique()), show=True)
fig_tab(f, t)
drugs_types = drugs.keys()
drug_categories = clinical.drugs.drugname.map(drug_map)
drug_given = pd.DataFrame({d: ((drug_categories == d).groupby(level=0).sum() > 0) for d in drugs_types})
fig, axs = subplots(1,2, figsize=(12,4))
crosstab(stage, drug_given.sum(1) > 0)[True].plot(kind='bar', ax=axs[0])
axs[0].set_ylabel('# of Patients')
axs[0].set_title('Patients Receiving Medication By Stage')
drug_given.sum().plot(kind='bar', ax=axs[1]);
axs[1].set_title('Drug Categories');
drug_given.sum()
s = drug_given.ix[stage.index[stage == 'Stage IV']].sum()
s.plot(kind='bar')
ylabel('# of Patients')
n = drug_given.ix[stage.index[stage == 'Stage IV']].dropna().sum(1).value_counts()[:5]
n.name = 'Number of Medications'
n.plot(kind='bar')
ylabel('# of Patients')
gc = drug_given.astype(int).astype(str).apply(lambda s: ''.join(s), axis=1)
gc.name = 'drugs'
one_drug = drug_given.ix[stage.index[stage == 'Stage IV']].dropna().sum(1) == 1
vc = drug_given.ix[one_drug[one_drug].index].sum().order()
vc.plot(kind='bar')
ylabel('# of Patients')
surv = clinical.survival.survival_5y
p = drug_given.ix[one_drug[one_drug].index]['IL2/IF']
clin = clinical.clinical.ix[p[p].index][['tumor_grade','calcium_level','histo_grade','neo_status','hemoglobin','gender','tissuesourcesite','age']]
tf = clinical.timeline.ix[p[p].index].sort(columns='daystodeath').ix[:,:2]
tf.join(clin)
c = mut.df.ix[:,tf.index].dropna(1).columns
tf.join(clin).ix[c]
p1 = array(['TCGA-CJ-4923','TCGA-B8-4143'])
p2 = array(['TCGA-CW-5580','TCGA-CJ-5682','TCGA-CW-5591','TCGA-CZ-5460'])
pd.Series(np.ones_like(p1), p1)
pat = pd.concat([pd.Series(np.ones(len(p1)), p1), 2.*pd.Series(np.ones(len(p2)), p2)])
pat.name = 'IL2'
draw_survival_curves(pat, surv, show=True)
pat = pat.map({1: 'bad', 2: 'good'})
p = mut.df.ix[:,pat.index].dropna(axis=1)
good_counts = p.ix[:, pat[pat=='good'].index].dropna(axis=1).sum(1)
good_counts = good_counts[good_counts > 0].order()
bad_counts = p.ix[:, pat[pat=='bad'].index].dropna(axis=1).sum(1)
bad_counts = bad_counts[bad_counts > 0].order()
gb = pd.concat([good_counts, bad_counts], axis=1, keys=['good','bad']).fillna(0)
gb.bad.order().dropna().tail(2)
gb.ix[gb.bad.order().dropna().tail(2).index]
gb[(gb.good - gb.bad).abs() > 2]
mut.df.ix['SETD2'].ix[metastatic].value_counts()
pd.crosstab(pd.Series(ones_like(mut.df.columns), mut.df.columns), stage)
f = pd.concat([1.*(mut.df.ix['SETD2']), 2.*(mut.df.ix['BAP1']>0)], axis=1).sum(1)
f = f[(f > 0) * (f < 3)]
f.name = 'SETD2 vs. BAP1'
draw_survival_curves(mut.df.ix['ACSBG2'].ix[metastatic], surv, show=True)
}draw_survival_curves(f.ix[metastatic], surv, show=True, labels=['SETD2','BAP1'])
(gb.bad - gb.good).order().dropna()
s = pd.Series({m: anova(pat, vec) for m,vec in rppa.features.iterrows()})
s = s.order()
f = rppa.features.ix[s.index[0], metastatic]
f.name = str(f.name)
draw_survival_curves(f, surv, show=True, q=.5)
violin_plot_pandas(pat, rppa.features.ix[s.index[0]]);
s
split_cols = lambda s: ','.join([d for i,d in enumerate(drug_given.columns) if s[i] == '1'])
t = get_surv_fit(surv, gc[one_drug[one_drug].index])
t.columns = pd.MultiIndex.from_tuples([('','# Patients'), ('','# Deaths'),
('', 'Median OS'), ('95% Confidence Int.', 'Lower'),
('95% Confidence Int.', 'Upper')])
t.index = map(split_cols, t.index)
t
draw_survival_curves(gc[one_drug[one_drug].index], surv, colors=['red','orange','green','purple','blue','yellow'],
labels=[c for c in drug_given.columns if vc[c] > 0][::-1], show=True, show_legend='out')
gc.value_counts()
drug_lists = gc.apply(lambda s: ','.join([d for i,d in enumerate(drug_given.columns) if s[i] == '1']))
pd.crosstab(drug_lists, stage).ix[1:].T.plot(kind='bar')
mut = cancer.load_data('MAF')
mut.uncompress()
meth = cancer.load_data('Methylation')
cn = cancer.load_data('CN_broad')
cn.uncompress()
rna = cancer.load_data('mRNASeq')
rppa = cancer.load_data('RPPA')
vhl_mut = mut.df.ix['VHL']
vhl_mut.name = 'VHL_mut'
vhl_meth = meth.df.ix['VHL']
vhl_meth.name = 'VHL_meth'
vhl_rna = rna.df.ix['VHL']
vhl_rna.name = 'VHL_rna'
cdk_del = cn.df.ix['Deletion'].ix['9p21.3'].ix[0]
cdk_del.name = 'del_band'
draw_survival_curves_split(cdk_del, clinical.clinical.tumor_stage, surv, ann='p', show=True)
draw_survival_curves_split(vhl_mut, stage, surv, ann='p', q=.25, show=True)
draw_survival_curves(f, surv, ann='p', show=True)
met = (mut.df.ix[:,metastatic] > 0).sum(1).order()
met = met[met>2]
g = (mut.df > 0).sum(1).order()
g = g.ix[met.index]
m = pd.concat([met, g-met, g], keys=['Metastatic','Non-Metastatic', 'All'], axis=1)
pd.crosstab(
m
g = (mut.df.ix[:,metastatic] > 0).sum(1).order()
g = g[g>2]
g.plot(kind='bar')
ylabel('# of Patients')
g
def pathway_plot(df, clip=True, ax=None):
df = df.ix[df.sum(1) > 0, df.sum() > 0]
df = df.ix[df.sum(1).order(ascending=False).index]
o = sort(df.apply(lambda s: ''.join(map(str, s)))).index[::-1]
df = df[o]
if (df.shape[0] > 20) and clip:
rest = Series(df.ix[10:].sum().clip_upper(1.), name='rest')
df = df.ix[:10]
df = df.append(rest)
if ax is None:
fig, ax = plt.subplots(figsize=(df.shape[1]*.2,df.shape[0]*.5))
else:
fig = ax.get_figure()
memo_plot(df, ax=ax)
ax.bar(arange(len(df.columns)) - .3, df.sum() / df.sum().max(), bottom=-1.5,
width=.6, alpha=.5)
counts = df.sum(1)[::-1]
width = df.shape[1]
ax.barh(arange(len(counts)) - .3, (counts / counts.max())*width*.25, left=width - .2,
height=.6, alpha=.5)
ax.set_frame_on(False)
ax.tick_params(right='off')
fig.tight_layout()
pathway_plot(mut.df.ix[g[g>3].index,metastatic], False)
counts = mut.df.ix[:,metastatic].sum(1)
counts.name ='count'
#t = run_feature_matrix(mut.df.ix[g[g>3].index,metastatic], test)
t.join(counts).sort(columns=[('Full','LR')])
233 + 16 + 4 + 74
233 + 16 + 4 + 74
surv = clinical.survival.survival
f = (mut.features.ix['PBRM1']>0) + (2.*(mut.features.ix['BAP1']>0))
f.name = 'f'
draw_survival_curves(f, surv, stage, ann='p', colors=['green','blue','red', 'purple'], show=True, show_legend=False)
f = mut.features.ix['WNT_SIGNALING']
draw_survival_curves(f, surv, stage, ann='p', show=True)
metastatic = stage[stage == 'Stage IV'].index
survival_test = 'survival_5y'
covariates = ['age', ('mutation', 'rate_non')]
cov_df = global_vars.join(clinical.clinical, how='outer').join(cdk_del)
cov_df = cov_df[covariates]
remerge = lambda s: '__'.join(s) if type(s) != str else s
cov_df = cov_df.rename(columns=remerge)
surv = clinical.survival[survival_test]
test = SurvivalTest(surv, cov_df)
test.name = survival_test
test.check_feature = lambda s: True
df = mut.features.ix[:,stage[stage.isin(['Stage IV'])].index]
df = df.dropna(axis=1)
counts = Series(df.sum(1), name='counts')
df = df[counts > 6]
#del get_cox_ph_ms.null_model
mut_met = run_feature_matrix(df, test)
mut_met = mut_met.join(counts).sort(columns=[('Full','LR')])
mut_met.ix['PBRM1']
mut_met.head(25).astype(object)
import Reports.Figures as F
reload(F)
from Reports.Figures import *
from Reports.NotebookTools import *
def draw_me(f):
split_by_stage = draw_survival_curves(mut.features.ix[f], surv, stage, ann='p', show=True)
all_surv = draw_survival_curves(mut.features.ix[f], surv, ann='p', show=True)
curves = draw_survival_curves(mut.features.ix[f, df.columns], surv, ann='p', filename='tmp.png', show=True)
try:
figsize=(6,4)
pathway_plot(mut.df.ix[run.gene_sets[f], df.columns], plt.gca())
plt.tight_layout()
plt.savefig('tmp1.png', dpi=75, bbox_inches=0, pad_inches=0)
plt.close('all')
return stack([side_by_side(['tmp.png', 'tmp1.png']), split_by_stage, all_surv])
except:
return stack([curves, split_by_stage, all_surv])
s = stack([draw_me(f) for f in mut_met.index[:15]])
s